# Alex F. Gazmararian
# agazmararian@gmail.com

library(here)
source(here("analysis", "visibility", "analysis_config.R"))
source(here("R", "load_functions.R"))
library(igraph)
library(ggraph)

# Configuration ----
# PNAS mode: when TRUE (default), only outputs for the paper are generated
if (!exists("pnas")) {
  pnas <- getOption("pnas.mode", TRUE)
}

# Configuration for robustness checks
# Set to NULL for primary analysis, or specify robustness check identifier
if (!exists("ROBUSTNESS_CHECK")) {
  ROBUSTNESS_CHECK <- NULL  # Default to primary analysis
}

# When in PNAS mode AND running robustness checks, skip most robustness outputs
skip_robustness_tables <- pnas && !is.null(ROBUSTNESS_CHECK)

# Helper function to create input/output file paths
create_analysis_paths <- function(robustness_check = NULL) {
  if (is.null(robustness_check)) {
    list(
      input_file = here("data", "output", "statements_analysis.csv"),
      suffix = "",
      description = "primary",
      credit_panel_fig = "5"
    )
  } else {
    list(
      input_file = here("data", "output", sprintf("statements_analysis_%s.csv", robustness_check)),
      suffix = sprintf("_%s", robustness_check),
      description = robustness_check,
      credit_panel_fig = "S14"
    )
  }
}

# Create paths based on configuration
analysis_paths <- create_analysis_paths(ROBUSTNESS_CHECK)

# Load data
message(sprintf("=== RUNNING %s STATEMENTS ANALYSIS ===", toupper(analysis_paths$description)))
message(sprintf("Loading data from: %s", analysis_paths$input_file))
if (!is.null(ROBUSTNESS_CHECK)) {
  message(sprintf("Output files will be saved with suffix: %s", analysis_paths$suffix))
}
g <- read_csv(analysis_paths$input_file, show_col_types = FALSE)

# Check statement distribution
n_potential_statements <- nrow(g)
n_identified_statements <- nrow(g %>% filter(gave_statement == 1))

if (is.null(ROBUSTNESS_CHECK)) {
  cat(as.character(n_potential_statements), file = "output/pnas/stats/n_potential_statements.txt")
  cat(as.character(n_identified_statements), file = "output/pnas/stats/n_identified_statements.txt")
}

# Configuration-----
source(here("R", "coefmap.R"))

# Model specification
covar.list <- c(
  "party", "sector", "target_jobs_bin", "capital_bin", "status", "manufacturing_bin", "highway",
  "college_acs_z", "poverty_acs_z", "bb100_bin", "gdp_ln_z", "urate_z", "force_ln_z", "income_pc_z",
  "demshare_major_z", "rep_party", "foreign_acs_z", "housing_acs_z",
  "gov_party", "eprice_z", "union_mem_z", "swingstate", "is_competitive_cong", "factor(year)"
)

# Helper function to standardize actor labels
standardize_actor <- function(actor) {
  case_when(
    actor == "senator" ~ "Senator",
    actor == "senator_1_text" ~ "Senator",
    actor == "senator_2_text" ~ "Senator",
    actor == "governor_text" ~ "Governor",
    actor == "company_text" ~ "Company",
    actor == "representative_text" ~ "U.S. Rep",
    actor == "biden_text" ~ "Biden",
    TRUE ~ actor
  )
}

# Helper function to summarize statements
summarize_statements <- function(data, group_vars) {
  data %>%
    mutate(actor = ifelse(grepl("senator", actor), "senator", actor)) %>%
    group_by(across(all_of(group_vars)), id) %>%
    reframe(gave_statement = as.integer(sum(gave_statement, na.rm = TRUE) > 0)) %>%
    group_by(across(all_of(group_vars))) %>%
    summarize(
      n_projects = n_distinct(id),
      n_projects_with_statement = sum(gave_statement),
      pct_projects_with_statement = n_projects_with_statement / n_projects,
      .groups = "drop"
    ) %>%
    mutate(actor = standardize_actor(actor))
}

# Helper function to plot statements
plot_statements <- function(summary_df, x, fill = NULL, title, ylab = NULL, fill_palette = NULL) {
  x_sym <- rlang::sym(x)
  fill_sym <- if (!is.null(fill)) rlang::sym(fill) else NULL
  summary_df <- summary_df %>%
    mutate(!!x_sym := factor(!!x_sym, levels = unique(arrange(summary_df, desc(pct_projects_with_statement))[[x]])))
  p <- ggplot(summary_df, aes(x = !!x_sym, y = pct_projects_with_statement, fill = if (!is.null(fill_sym)) !!fill_sym else NULL)) +
    geom_col(position = if (!is.null(fill)) "dodge" else "stack") +
    geom_text(
      aes(label = paste0(round(pct_projects_with_statement * 100, 0), "%")),
      position = if (!is.null(fill)) position_dodge(width = 0.9) else position_stack(vjust = 1),
      vjust = -.5,
      size = plot_text_size
    ) +
    theme_classic(base_size = 10) +
    scale_x_discrete(expand = c(0, 0), labels = label_wrap_gen(20)) +
    scale_y_continuous(expand = c(0, 0), labels = scales::percent_format(accuracy = 1), limits = c(0, 1.05)) +
    labs(title = title, x = NULL, y = ylab, fill = NULL) +
    theme(
      panel.grid = element_blank(),
      legend.position = "inside",
      legend.position.inside = c(.85, .85),
      axis.ticks = element_blank(),
      axis.text.y = element_text(color = "black"),
      axis.title.x = element_text(color = "black"),
      axis.title.y = element_text(color = "black"),
      plot.title = element_text(color = "black"),
      plot.subtitle = element_text(color = "black")
      )
  if (!is.null(fill_palette)) {
    p <- p + fill_palette
  }
  p
}

# Helper for factor assignment
set_factor_levels <- function(df, col, levels) {
  df[[col]] <- factor(df[[col]], levels = levels)
  df
}

# Create summary statistics table (skip for robustness checks)----
if (is.null(ROBUSTNESS_CHECK)) {

# Create comprehensive summary statistics table for all covariates used in models
summary_vars <- c(
  # Categorical variables (will show proportions)
  "party", "sector", "target_jobs_bin", "capital_bin", "status", "manufacturing_bin", 
  "highway", "bb100_bin", "rep_party", "gov_party", "swingstate", "is_competitive_cong",
  
  # Continuous variables (will show mean, sd, min, max)
  "college_acs_z", "poverty_acs_z", "gdp_ln_z", "urate_z", "force_ln_z", "income_pc_z",
  "demshare_major_z", "foreign_acs_z", "housing_acs_z", "eprice_z", "union_mem_z"
)

# Function to create summary statistics
create_summary_stats <- function(data, vars) {
  summary_list <- list()
  
  for (var in vars) {
    if (var %in% names(data)) {
      if (is.numeric(data[[var]])) {
        # For continuous variables
        stats <- tibble(
          Variable = var,
          Type = "Continuous",
          N = sum(!is.na(data[[var]])),
          Mean_num = round(mean(data[[var]], na.rm = TRUE), 3),
          SD = round(sd(data[[var]], na.rm = TRUE), 3),
          Min = round(min(data[[var]], na.rm = TRUE), 3),
          Max = round(max(data[[var]], na.rm = TRUE), 3),
          Missing = sum(is.na(data[[var]])),
          Mean_char = as.character(round(mean(data[[var]], na.rm = TRUE), 3))
        )
        summary_list[[var]] <- stats
      } else {
        # For categorical variables
        freq_table <- table(data[[var]], useNA = "ifany")
        total_n <- length(data[[var]])
        missing_n <- sum(is.na(data[[var]]))
        
        # Create a summary for categorical variables
        stats <- tibble(
          Variable = var,
          Type = "Categorical",
          N = total_n - missing_n,
          Mean_num = NA_real_,
          SD = NA_real_,
          Min = NA_real_,
          Max = NA_real_,
          Missing = missing_n,
          Mean_char = paste(names(freq_table), " (", round(100 * freq_table / total_n, 1), "%)", 
                           collapse = "; ", sep = "")
        )
        summary_list[[var]] <- stats
      }
    }
  }
  
  # Combine all summaries and create final Mean column
  result <- bind_rows(summary_list) %>%
    mutate(
      Mean = ifelse(Type == "Continuous", Mean_char, Mean_char)
    ) %>%
    select(Variable, Type, N, Mean, SD, Min, Max, Missing)
  
  return(result)
}

# Create summary for full dataset
summary_stats_full <- create_summary_stats(g, summary_vars)

# Create summary by actor type (excluding companies and Biden for party-related variables)
summary_stats_by_actor <- g %>%
  filter(actor != "company_text" & actor != "biden_text") %>%
  create_summary_stats(summary_vars)

# Add year variable summary
year_summary <- g %>%
  group_by(year) %>%
  summarise(
    n_observations = n(),
    n_projects = n_distinct(id),
    .groups = "drop"
  )

# Create a more detailed summary table with variable descriptions
variable_descriptions <- tibble(
  Variable = c(
    "party", "sector", "target_jobs_bin", "capital_bin", "status", "manufacturing_bin",
    "highway", "bb100_bin", "rep_party", "gov_party", "swingstate", "is_competitive_cong",
    "college_acs_z", "poverty_acs_z", "gdp_ln_z", "urate_z", "force_ln_z", "income_pc_z",
    "demshare_major_z", "foreign_acs_z", "housing_acs_z", "eprice_z", "union_mem_z"
  ),
  Description = c(
    "Speaker party affiliation (D/R)",
    "Project sector",
    "Target jobs (binary)",
    "Capital investment (binary)",
    "Project status",
    "Manufacturing project (binary)",
    "Highway access (binary)",
    "Broadband 100+ Mbps (binary)",
    "Republican representative (binary)",
    "Republican governor (binary)",
    "Swing state (binary)",
    "Competitive congressional district (binary)",
    "College education (standardized)",
    "Poverty rate (standardized)",
    "GDP log (standardized)",
    "Unemployment rate (standardized)",
    "Labor force log (standardized)",
    "Income per capita (standardized)",
    "Democratic vote share (standardized)",
    "Foreign-born population (standardized)",
    "Housing costs (standardized)",
    "Electricity price (standardized)",
    "Union membership (standardized)"
  )
)

# Merge descriptions with summary stats
summary_stats_final <- summary_stats_full %>%
  left_join(variable_descriptions, by = "Variable") %>%
  select(Variable, Description, Type, N, Mean, SD, Min, Max, Missing) %>%
  arrange(Type, Variable)

# Print summary information
cat("Summary Statistics for Regression Covariates\n")
cat("===========================================\n\n")
cat("Total observations:", nrow(g), "\n")
cat("Unique projects:", n_distinct(g$id), "\n")
cat("Unique states:", n_distinct(g$state), "\n\n")

cat("Observations by year:\n")
print(year_summary)

cat("\nObservations by actor:\n")
print(table(g$actor))

cat("\nMissing data check for key covariates:\n")
missing_check <- g %>%
  select(all_of(summary_vars[summary_vars %in% names(g)])) %>%
  summarise(across(everything(), ~ sum(is.na(.)))) %>%
  pivot_longer(everything(), names_to = "Variable", values_to = "Missing") %>%
  arrange(desc(Missing))
print(missing_check)

# Create dummy variables for categorical variables
create_dummy_stats <- function(data, vars) {
  dummy_stats <- list()
  
  for (var in vars) {
    if (var %in% names(data)) {
      if (is.numeric(data[[var]])) {
        # For continuous variables, keep as is
        stats <- tibble(
          Variable = var,
          Type = "Continuous",
          N = sum(!is.na(data[[var]])),
          Mean = round(mean(data[[var]], na.rm = TRUE), 3),
          SD = round(sd(data[[var]], na.rm = TRUE), 3),
          Min = round(min(data[[var]], na.rm = TRUE), 3),
          Max = round(max(data[[var]], na.rm = TRUE), 3),
          Missing = sum(is.na(data[[var]]))
        )
        dummy_stats[[var]] <- stats
      } else {
        # For categorical variables, create dummy variables
        unique_vals <- unique(data[[var]][!is.na(data[[var]])])
        
        for (val in unique_vals) {
          dummy_var <- paste0(var, "_", gsub("[^A-Za-z0-9]", "_", val))
          dummy_vec <- as.numeric(data[[var]] == val)
          dummy_vec[is.na(data[[var]])] <- NA
          
          stats <- tibble(
            Variable = dummy_var,
            Type = "Dummy",
            N = sum(!is.na(dummy_vec)),
            Mean = round(mean(dummy_vec, na.rm = TRUE), 3),
            SD = round(sd(dummy_vec, na.rm = TRUE), 3),
            Min = round(min(dummy_vec, na.rm = TRUE), 3),
            Max = round(max(dummy_vec, na.rm = TRUE), 3),
            Missing = sum(is.na(dummy_vec))
          )
          dummy_stats[[dummy_var]] <- stats
        }
      }
    }
  }
  
  bind_rows(dummy_stats)
}

# Create subsets for variables with applicability restrictions
# party: only for elected officials (exclude companies and Biden)
g_elected <- g %>% filter(!actor %in% c("company_text", "biden_text"))
# rep_party: only for representatives  
g_reps <- g %>% filter(actor == "representative_text")
# gov_party: only for governors
g_govs <- g %>% filter(actor == "governor_text")

# Variables that apply to all observations
vars_all <- summary_vars[!summary_vars %in% c("party", "rep_party", "gov_party")]

# Create summary stats for universally applicable variables
summary_stats_dummies <- create_dummy_stats(g, vars_all)

# Add party stats (for politicians only)
summary_stats_party <- create_dummy_stats(g_elected, "party")
summary_stats_dummies <- bind_rows(summary_stats_dummies, summary_stats_party)

# Add rep_party stats (for representatives only)
summary_stats_rep_party <- create_dummy_stats(g_reps, "rep_party")
summary_stats_dummies <- bind_rows(summary_stats_dummies, summary_stats_rep_party)

# Add gov_party stats (for governors only)
summary_stats_gov_party <- create_dummy_stats(g_govs, "gov_party")
summary_stats_dummies <- bind_rows(summary_stats_dummies, summary_stats_gov_party)

# Add statement character count (only for statements that exist)
statement_chars <- g %>% 
  filter(gave_statement == 1) %>%
  mutate(statement_nchar = nchar(statement))

statement_char_stats <- tibble(
  Variable = "statement_nchar",
  Type = "Continuous",
  N = sum(!is.na(statement_chars$statement_nchar)),
  Mean = round(mean(statement_chars$statement_nchar, na.rm = TRUE), 0),
  SD = round(sd(statement_chars$statement_nchar, na.rm = TRUE), 0),
  Min = min(statement_chars$statement_nchar, na.rm = TRUE),
  Max = max(statement_chars$statement_nchar, na.rm = TRUE),
  Missing = sum(is.na(statement_chars$statement_nchar))
)

# Add missing location indicator (missing if fips is NA)
missing_geo <- as.numeric(is.na(g$fips))
missing_geo_stats <- tibble(
  Variable = "missing_geo",
  Type = "Dummy",
  N = length(missing_geo),
  Mean = round(mean(missing_geo, na.rm = TRUE), 3),
  SD = round(sd(missing_geo, na.rm = TRUE), 3),
  Min = 0,
  Max = 1,
  Missing = 0
)

summary_stats_dummies <- bind_rows(statement_char_stats, missing_geo_stats, summary_stats_dummies)

# Create LaTeX table for publication using dummy variables
summary_table_latex <- summary_stats_dummies %>%
  mutate(
    # Clean variable names for display
    Variable_clean = case_when(
      # Statement characteristics
      Variable == "statement_nchar" ~ "Statement character count",
      Variable == "missing_geo" ~ "Missing location (FIPS)",
      # Continuous variables
      Variable == "college_acs_z" ~ "College education (z)",
      Variable == "poverty_acs_z" ~ "Poverty rate (z)",
      Variable == "gdp_ln_z" ~ "GDP log (z)",
      Variable == "urate_z" ~ "Unemployment rate (z)",
      Variable == "force_ln_z" ~ "Labor force log (z)",
      Variable == "income_pc_z" ~ "Income per capita (z)",
      Variable == "demshare_major_z" ~ "Democratic vote share (z)",
      Variable == "foreign_acs_z" ~ "Foreign-born population (z)",
      Variable == "housing_acs_z" ~ "Housing costs (z)",
      Variable == "eprice_z" ~ "Electricity price (z)",
      Variable == "union_mem_z" ~ "Union membership (z)",
      Variable == "bb100_bin" ~ "Broadband 100+ Mbps",
      Variable == "capital_bin" ~ "Capital investment",
      Variable == "highway" ~ "Highway access",
      Variable == "manufacturing_bin" ~ "Manufacturing project",
      Variable == "target_jobs_bin" ~ "Target jobs specified",
      Variable == "swingstate" ~ "Swing state",
      Variable == "is_competitive_cong" ~ "Competitive district",
      Variable == "rep_party" ~ "Republican representative",
      Variable == "gov_party" ~ "Republican governor",
      
      # Party dummies
      Variable == "party_D" ~ "Party: Democrat",
      Variable == "party_R" ~ "Party: Republican",
      
      # Sector dummies
      Variable == "sector_Batteries" ~ "Sector: Batteries",
      Variable == "sector_EVs" ~ "Sector: EVs",
      Variable == "sector_Solar" ~ "Sector: Solar",
      Variable == "sector_Wind" ~ "Sector: Wind",
      
      # Status dummies
      Variable == "status_Cancelled_Closed_Paused_Sold_Rumored" ~ "Status: Cancelled/Closed/Paused/Sold/Rumored",
      Variable == "status_Operating" ~ "Status: Operating", 
      Variable == "status_Pilot_Planned_Construction" ~ "Status: Pilot/Planned/Construction",
      
      # Rep party dummies
      Variable == "rep_party_D" ~ "U.S. Rep. Party: Democrat",
      Variable == "rep_party_R" ~ "U.S. Rep. Party: Republican",
      
      # Gov party dummies  
      Variable == "gov_party_D" ~ "Governor Party: Democrat",
      Variable == "gov_party_R" ~ "Governor Party: Republican",
      
      TRUE ~ gsub("_", " ", Variable)
    ),
    # Format all statistics as character for consistent display
    Mean_display = as.character(Mean),
    SD_display = as.character(SD),
    Min_display = as.character(Min),
    Max_display = as.character(Max),
    Missing_display = as.character(Missing)
  ) %>%
  select(Variable_clean, Mean_display, SD_display, Min_display, Max_display, Missing_display) %>%
  rename(
    Variable = Variable_clean,
    Mean = Mean_display,
    SD = SD_display,
    Min = Min_display,
    Max = Max_display,
    Missing = Missing_display
  )

# Create summary table using tt() format to match visibility analysis
summary_table <- summary_table_latex %>%
  tt(
    caption = "Summary statistics for statement analysis covariates",
    notes = paste(
      "Notes: Summary statistics for covariates used in statement regression models.",
      "Statement character count is calculated only for observations with statements.",
      "Continuous and dummy variables show mean, standard deviation, minimum, maximum, and missing values.",
      "Categorical variables are split into dummy variables (0/1) for each category.",
      "Standardized variables (z) are standardized across unique counties (each county weighted equally),",
      "so means in this table may differ from zero due to counties with more statements receiving greater weight.",
      "Party variables are calculated on applicable subsets: speaker party excludes companies and President;",
      "representative party includes only U.S. Rep. observations; governor party includes only governor observations.",
paste0("Full sample $N=", nrow(g), "$; elected officials (excl. Company/Biden) $N=", nrow(g_elected),
             "$; representatives $N=", nrow(g_reps), "$; governors $N=", nrow(g_govs), "$."),
      sep = " "
    ),
    escape = FALSE
  ) %>%
  format_tt(
    digits = 3,
    num_fmt = "significant_cell"
  ) %>%
  setNames(c(" ", "Mean", "SD", "Min", "Max", "Missing")) %>%
  theme_latex(resize_width = 0.8, resize_direction = "both", placement = "H")

# Save LaTeX table
summary_table %>%
  save_tt(here("output", "pnas", "tables", "tab_S30_statements_summary_statistics.tex"), overwrite = TRUE)

cat("\nSummary statistics table saved to:\n")
cat("- LaTeX: output/pnas/tables/tab_S30_statements_summary_statistics.tex\n")

} # End summary statistics section (skip for robustness checks)

# Statement issuing----

# Distribution of statement type
p.statement.type <- g %>%
  pivot_longer(cols = c(company_text_category_1, governor_text_category_1, senator_1_text_category_1, senator_2_text_category_1, representative_text_category_1, biden_text_category_1), names_to = "category", values_to = "statement_type") %>%
  mutate(category = ifelse(grepl("senat", category), "senator", category)) %>%
  group_by(category, statement_type) %>%
  summarize(n = n()) %>%
  group_by(category) %>%
  mutate(
    prop = n / sum(n),
    category = case_when(
      grepl("biden", category) ~ "Biden",
      grepl("governor", category) ~ "Governor",
      grepl("senator", category) ~ "Senator",
      grepl("representative", category) ~ "Representative",
      grepl("company", category) ~ "Company"
    )
  ) %>%
  ggplot(aes(x = category, y = prop, fill = statement_type)) +
  geom_col() +
  theme_classic(base_size = 10) +
  labs(x = "Statement type", y = "Percent", fill = NULL) +
  scale_y_continuous(expand = c(0, 0), labels = scales::percent) +
  scale_fill_viridis_d() +
  theme(
    legend.position = "bottom"
  )

if (!skip_robustness_tables) {
  save_pnas_pdf(p.statement.type,
  here("output", "pnas", "figures", sprintf("fig_S13_statements_type%s.pdf", analysis_paths$suffix)), width = "double", height = 10)
}


## Plots----

proj_n <- n_distinct(g$id)

if (is.null(ROBUSTNESS_CHECK)) {
  cat(as.character(proj_n), file = "output/pnas/stats/proj_n_statements.txt")
}

# Analysis 1: Statements by project-actor
statements_by_actor <- summarize_statements(g, "actor")
statements_by_actor_party <- summarize_statements(g, c("actor", "party_label"))
statements_by_capital <- summarize_statements(g, c("actor", "capital_bin_label"))

# Calculate key statistics
gov.dem <- statements_by_actor_party$pct_projects_with_statement[statements_by_actor_party$actor == "Governor" & statements_by_actor_party$party_label == "Democrat"]
gov.rep <- statements_by_actor_party$pct_projects_with_statement[statements_by_actor_party$actor == "Governor" & statements_by_actor_party$party_label == "Republican"]
sen.dem <- statements_by_actor_party$pct_projects_with_statement[statements_by_actor_party$actor == "Senator" & statements_by_actor_party$party_label == "Democrat"]
sen.rep <- statements_by_actor_party$pct_projects_with_statement[statements_by_actor_party$actor == "Senator" & statements_by_actor_party$party_label == "Republican"]
rep.dem <- statements_by_actor_party$pct_projects_with_statement[statements_by_actor_party$actor == "U.S. Rep" & statements_by_actor_party$party_label == "Democrat"]
rep.rep <- statements_by_actor_party$pct_projects_with_statement[statements_by_actor_party$actor == "U.S. Rep" & statements_by_actor_party$party_label == "Republican"]

# Save statistics for primary analysis only
if (is.null(ROBUSTNESS_CHECK)) {
  # By actor
  cat(as.character(round(statements_by_actor$pct_projects_with_statement[statements_by_actor$actor == "Company"] * 100, 0)), file = "output/pnas/stats/company_statements.txt")
  cat(as.character(round(statements_by_actor$pct_projects_with_statement[statements_by_actor$actor == "Governor"] * 100, 0)), file = "output/pnas/stats/governor_statements.txt")
  cat(as.character(round(statements_by_actor$pct_projects_with_statement[statements_by_actor$actor == "Senator"] * 100, 0)), file = "output/pnas/stats/senator_statements.txt")
  cat(as.character(round(statements_by_actor$pct_projects_with_statement[statements_by_actor$actor == "U.S. Rep"] * 100, 0)), file = "output/pnas/stats/rep_statements.txt")
  cat(as.character(round(statements_by_actor$pct_projects_with_statement[statements_by_actor$actor == "Biden"] * 100, 0)), file = "output/pnas/stats/biden_statements.txt")
  
  # By actor party
  cat(as.character(round(gov.dem[1] * 100, 0)), file = "output/pnas/stats/governor_statements_democrat.txt")
  cat(as.character(round(gov.rep[1] * 100, 0)), file = "output/pnas/stats/governor_statements_republican.txt")
  cat(as.character(round(sen.dem[1] * 100, 0)), file = "output/pnas/stats/senator_statements_democrat.txt")
  cat(as.character(round(sen.rep[1] * 100, 0)), file = "output/pnas/stats/senator_statements_republican.txt")
  cat(as.character(round(rep.dem[1] * 100, 0)), file = "output/pnas/stats/rep_statements_democrat.txt")
  cat(as.character(round(rep.rep[1] * 100, 0)), file = "output/pnas/stats/rep_statements_republican.txt")
}

plot_settings <- list(
  list(
    data = statements_by_actor,
    x = "actor",
    ylab = "Projects with statement (%)",
    fill = NULL,
    title = "Overall",
    filename = sprintf("fig_statements_by_actor%s.pdf", analysis_paths$suffix)
  ),
  list(
    data = statements_by_actor_party %>% filter(actor != "Company" & !is.na(party_label)),
    x = "actor",
    fill = "party_label",
    ylab = "Projects with statement (%)",
    title = "By speaker partisanship",
    filename = sprintf("fig_statements_by_actor_party%s.pdf", analysis_paths$suffix),
    fill_palette = scale_fill_manual(values = c("Democrat" = party_colors$Democrat, "Republican" = party_colors$Republican))
  ),
  list(
    data = statements_by_capital,
    x = "actor",
    fill = "capital_bin_label",
    ylab = "Projects with statement (%)",
    title = "By investment target",
    filename = sprintf("fig_statements_by_capital%s.pdf", analysis_paths$suffix),
    fill_palette = scale_fill_manual(values = c("Investment amount specified" = "grey10", "Not specified" = "grey60"))
  )
)

message("% of projects with investment target specified: ", mean(g$capital_bin))

# Generate and save plots
plots <- list()
for (i in seq_along(plot_settings)) {
  s <- plot_settings[[i]]
  p <- plot_statements(
    s$data,
    s$x,
    s$fill,
    s$title,
    s$ylab,
    fill_palette = if (!is.null(s$fill_palette)) s$fill_palette else NULL
  )
  plots[[i]] <- p
  # Diagnostic figures (not in paper)
  if (isFALSE(pnas)) {
    save_pnas_pdf(p, here("output", "pnas", "figures", s$filename))
  }
}

# Statement giving by actor over time
statements_by_quarter_actor <- summarize_statements(g, c("quarter", "actor"))
plot_data <- statements_by_quarter_actor %>%
  mutate(
    quarter_date = as.Date(paste0(substr(quarter, 1, 4), "-", 
                                  as.numeric(substr(quarter, 7, 7)) * 3 - 2, "-01"))
  )
x_limits <- range(c(plot_data$quarter_date))
x_limits[1] <- x_limits[1] - 30
x_limits[2] <- x_limits[2] + 180
label_data <- plot_data %>%
  group_by(actor) %>%
  filter(quarter_date == max(quarter_date)) %>%
  ungroup()

p.main <- plot_data %>%
  ggplot(aes(x = quarter_date, y = pct_projects_with_statement, color = actor, shape = actor)) +
  geom_line(linewidth = 1.2) +
  geom_point(size = 2.5) +
  geom_text(data = label_data, 
            aes(label = actor, color = actor), 
            hjust = -0.1, vjust = 0.5, size = 3, fontface = "bold") +
  scale_x_date(breaks = sort(unique(plot_data$quarter_date)), 
               labels = function(x) paste0(year(x), "-Q", quarter(x)),
               limits = x_limits,
               expand = c(0, 0)) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), 
                     limits = c(0, 1),
                     expand = expansion(mult = c(0, 0.05))) +
  scale_color_viridis_d(option = "plasma") +
  labs(
    title = "Over time",
    x = NULL,
    shape = NULL,
    y = "Projects with statement (%)"
  ) +
  theme_classic(base_size = 10) +
  theme(
    panel.grid = element_blank(),
    legend.position = "none",
    axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1, color = "black"),
    axis.text.y = element_text(color = "black"),
    axis.title.x = element_text(color = "black"),
    axis.title.y = element_text(color = "black"),
    axis.ticks = element_blank(),
    plot.title = element_text(color = "black"),
    plot.subtitle = element_text(color = "black")
  )

# Diagnostic figure (not in paper)
if (isFALSE(pnas)) {
  save_pnas_pdf(p.main, here("output", "pnas", "figures", sprintf("fig_statements_by_quarter_actor%s.pdf", analysis_paths$suffix)))
}

# Create combined figure
p.giving.out <- plots[[1]] + plots[[2]] + plots[[3]] + p.main +
  plot_annotation(tag_levels = "A",
                            tag_prefix = "", tag_suffix = "",
                            theme = theme(plot.tag = element_text(
                              face = "bold", family = "Arial", size = 10)))
# Only save for primary analysis (not affected by annotation robustness)
if (is.null(ROBUSTNESS_CHECK)) {
  save_pnas_pdf(
    p.giving.out,
    here("output", "pnas", "figures", "fig_4_statements.pdf"), 
    width = "double",
    height_cm = 13
  )
}

## Model of statement giving----

m.statements <- list()
m.statements$Company <- est_model(
  outcome = "gave_statement",
  varname = NULL,
  covar.list.in = covar.list[!grepl("^party$", covar.list)],
  data.in = subset(g, actor == "company_text"),
  se = "state",
  fe = NULL
  )
m.statements$Governor <- est_model(
  outcome = "gave_statement",
  varname = NULL,
  covar.list.in = covar.list,
  data.in = subset(g, actor == "governor_text"),
  se = "state",
  fe = NULL
  )
m.statements$Senator <- est_model(
  outcome = "gave_statement",
  varname = NULL,
  covar.list.in = covar.list,
  data.in = subset(g, actor %in% c("senator_1_text", "senator_2_text")),
  se = "state",
  fe = NULL
  )
m.statements$Rep <- est_model(
  outcome = "gave_statement",
  varname = NULL,
  covar.list.in = covar.list[!grepl("rep_party", covar.list)],
  data.in = subset(g, actor == "representative_text"),
  se = "state",
  fe = NULL
  )
m.statements$President <- est_model(
  outcome = "gave_statement",
  varname = NULL,
  covar.list.in = covar.list[!grepl("^party$", covar.list)],
  data.in = subset(g, actor == "biden_text"),
  se = "state",
  fe = NULL
  )

# Only generate the statements model table for the primary analysis
if (is.null(ROBUSTNESS_CHECK)) {
  statement.notes <- paste0(
    "\\textit{Notes:} Each column reports a separate linear probability model for a speaker. ",
    "The dependent variable equals 1 if the speaker issued a public project statement, 0 otherwise. ",
    "Unit of analysis is the project--actor pair. ",
    "Senators have higher observation counts (two per state). ",
    "Some covariates are missing for projects without announced locations. ",
    "Estimates are OLS with cluster-robust standard errors by state. ",
    "Continuous covariates are county-standardized. ",
    "Coefficients are percentage-point changes. ",
    "``Republican speaker'' indicates the party of the actor in each column header; ",
    "``Republican Governor'' and ``Republican Representative'' are contextual covariates capturing the party of these actors regardless of the speaker. ",
    "* $p<0.05$, ** $p<0.01$, *** $p<0.001$."
  )

  statement.caption <- "Linear probability models of statement giving, by speaker"

  if (is.null(ROBUSTNESS_CHECK)) {
    statement.file <- here("output", "pnas", "tables", "tab_S31_statements_model.tex")
  } else {
    statement.file <- here("output", "pnas", "tables", sprintf("tab_statements_model_%s.tex", ROBUSTNESS_CHECK))
  }

  modelsummary(
    m.statements,
    gof_omit = "IC|RMSE|Std|FE|Within",
    gof_map = gm,
    output = "tinytable",
    escape = FALSE,
    fmt = fmt_significant(2),
    vcov = ~ state,
    stars = c("*" = 0.05, "**" = 0.01, "***" = 0.001),
    coef_map = coefmap,
    notes = statement.notes,
    title = statement.caption,
    label = "tab:statements"
    ) %>%
    theme_latex(resize_width = .5, resize_direction = "both", placement = "H") %>%
    save_tt(output = statement.file, overwrite = TRUE)
}

## Check statements by jobs----

statements_by_jobs <- summarize_statements(g, c("actor", "target_jobs_bin"))

statements_by_jobs$actor <- factor(statements_by_jobs$actor, levels = c("Company", "Governor", "Biden", "Senator", "U.S. Rep"))

p.statements.by.jobs <- statements_by_jobs %>%
  ggplot(aes(x = actor, y = pct_projects_with_statement, fill = factor(target_jobs_bin))) +
  geom_col(position = "dodge") +
  scale_fill_viridis_d(name = "Target jobs", labels = c("Not specified", "Specified")) +
  geom_text(aes(label = paste0(round(pct_projects_with_statement * 100, 0), "%")), position = position_dodge(width = 0.9), vjust = -0.5, size = 3) +
  theme_classic(base_size = 10) +
  scale_y_continuous(expand = c(0, 0), labels = scales::percent_format(accuracy = 1), limits = c(0, 1.1)) +
  labs(x = "Speaker", y = "Projects with statement (%)") +
  theme(legend.position = "inside", legend.position.inside = c(.85, .85))

save_pnas_pdf(p.statements.by.jobs, here("output", "pnas", "figures", "fig_S15_statements_by_jobs.pdf"))

# Credit-claiming analysis----

## Plots----

### Overall----
p.credit.by.actor.prop <- g %>%
  filter(gives_credit == 1) %>%
  select(actor, credit_biden, credit_senate, credit_us_rep, credit_governor, credit_local, credit_ira, credit_bil) %>%
  pivot_longer(cols = c(credit_biden, credit_senate, credit_us_rep, credit_governor, credit_local, credit_ira, credit_bil), values_to = "gives_credit_to") %>%
  mutate(
    actor = standardize_actor(actor),
    name = case_when(
      name == "credit_biden" ~ "Biden Admin",
      name == "credit_local" ~ "Local",
      name == "credit_governor" ~ "Governor",
      name == "credit_us_rep" ~ "Rep",
      name == "credit_bil" ~ "BIL",
      name == "credit_ira" ~ "IRA",
      name == "credit_senate" ~ "Senator",
      TRUE ~ name
    )
  ) %>%
  group_by(actor, name) %>%
  summarize(prop = mean(gives_credit_to, na.rm = TRUE))

# Calculate key credit statistics
company.biden <- p.credit.by.actor.prop[which(p.credit.by.actor.prop$actor == "Company" & p.credit.by.actor.prop$name == "Biden Admin"), "prop"]
company.ira <- p.credit.by.actor.prop[which(p.credit.by.actor.prop$actor == "Company" & p.credit.by.actor.prop$name == "IRA"), "prop"]
biden.bil <- p.credit.by.actor.prop[which(p.credit.by.actor.prop$actor == "Biden" & p.credit.by.actor.prop$name == "BIL"), "prop"]
biden.ira <- p.credit.by.actor.prop[which(p.credit.by.actor.prop$actor == "Biden" & p.credit.by.actor.prop$name == "IRA"), "prop"]
senator.biden <- p.credit.by.actor.prop[which(p.credit.by.actor.prop$actor == "Senator" & p.credit.by.actor.prop$name == "Biden Admin"), "prop"]
rep.biden <- p.credit.by.actor.prop[which(p.credit.by.actor.prop$actor == "U.S. Rep" & p.credit.by.actor.prop$name == "Biden Admin"), "prop"]

# Save credit statistics for primary analysis only
if (is.null(ROBUSTNESS_CHECK)) {
  writeLines(as.character(round(company.biden * 100, 0)), "output/pnas/stats/company_credit_biden.txt")
  writeLines(as.character(round(company.ira * 100, 0)), "output/pnas/stats/company_credit_ira.txt")
  writeLines(as.character(round(biden.bil * 100, 0)), "output/pnas/stats/biden_credit_bil.txt")
  writeLines(as.character(round(biden.ira * 100, 0)), "output/pnas/stats/biden_credit_ira.txt")
  writeLines(as.character(round(senator.biden * 100, 0)), "output/pnas/stats/senator_credit_biden.txt")
  writeLines(as.character(round(rep.biden * 100, 0)), "output/pnas/stats/rep_credit_biden.txt")
}

# Credit by actor bar plot (diagnostic, not in paper)
if (isFALSE(pnas)) {
  p.credit.by.actor <- p.credit.by.actor.prop %>%
    ggplot(aes(x = name, y = prop, fill = actor)) +
    geom_col(position = "dodge") +
    scale_fill_viridis_d() +
    theme_classic(base_size = 10) +
    scale_y_continuous(expand = c(0, 0), labels = scales::percent_format(accuracy = 1), limits = c(0, 1.1)) +
    labs(x = "Credit Recipient", y = "Share of projects where credited by speaker (%)", fill = "Speaker") +
    theme(
      legend.position = "right"
    )
  save_pnas_pdf(p.credit.by.actor, here("output", "pnas", "figures", sprintf("fig_credit_by_actor%s.pdf", analysis_paths$suffix)))
}

### Heatmap----
heatmap_data <- g %>%
  filter(gives_credit == 1) %>%
  select(actor, credit_biden, credit_senate, credit_us_rep, credit_governor, credit_local, credit_ira, credit_bil) %>%
  # Standardize speaker (actor) names
  mutate(
    speaker = case_when(
      actor == "senator_1_text" | actor == "senator_2_text" ~ "Senator",
      actor == "representative_text" ~ "U.S. Rep",
      actor == "governor_text" ~ "Governor", 
      actor == "company_text" ~ "Company",
      actor == "biden_text" ~ "Biden",
      TRUE ~ actor
    )
  ) %>%
  # Reshape to long format for recipients
  pivot_longer(
    cols = c(credit_biden, credit_senate, credit_us_rep, credit_governor, credit_local, credit_ira, credit_bil),
    names_to = "recipient_var",
    values_to = "gives_credit_to"
  ) %>%
  # Standardize recipient names
  mutate(
    recipient = case_when(
      recipient_var == "credit_biden" ~ "Biden",
      recipient_var == "credit_senate" ~ "Senator", 
      recipient_var == "credit_us_rep" ~ "U.S. Rep",
      recipient_var == "credit_governor" ~ "Governor",
      recipient_var == "credit_local" ~ "Local",
      recipient_var == "credit_ira" ~ "IRA",
      recipient_var == "credit_bil" ~ "BIL",
      TRUE ~ recipient_var
    )
  ) %>%
  # Calculate proportion of credit given by each speaker to each recipient
  group_by(speaker, recipient) %>%
  summarize(
    prop_credit = mean(gives_credit_to, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  # Order speakers and recipients for better visualization
  mutate(
    speaker = factor(speaker, levels = c("Company", "Biden", "Governor", "Senator", "U.S. Rep")),
    recipient = factor(recipient, levels = c("Biden", "Governor", "Senator", "U.S. Rep", "Local", "IRA", "BIL"))
  )

credit.median <- median(heatmap_data$prop_credit)

# Create visualization
p_heatmap <- heatmap_data %>%
  ggplot(aes(x = speaker, y = recipient)) +
  geom_point(
    aes(size = prop_credit, fill = prop_credit > credit.median),
    color = "black",
    shape = 21,
    alpha = 0.9,
    stroke = 0.3
  ) +
  scale_fill_manual(
    values = c("FALSE" = "grey60", "TRUE" = "grey20"),
    guide = "none"
  ) +
  geom_text(
    aes(
      label = ifelse(prop_credit > credit.median, paste0(round(prop_credit * 100, 0), "%"), ""),
      color = prop_credit > credit.median
    ),
    fontface = "bold",
    size = 1.75
  ) +
  scale_color_manual(
    values = c("FALSE" = "black", "TRUE" = "white"),
    guide = "none"
  ) +
  scale_size_continuous(
    name = "% of projects in\nwhich a speaker\ncredits recipient",
    labels = scales::percent_format(accuracy = 1),
    range = c(1, 12),
    limits = c(0, NA),
    breaks = c(0.2, 0.4, 0.6, 0.8)
  ) +
  scale_x_discrete(expand = c(0.2, 0.2)) +
  scale_y_discrete(expand = c(0.2, 0.2)) +
  labs(
    title = NULL,
    x = "Speaker",
    y = "Credit Recipient",
    caption = NULL
  ) +
  theme_minimal(base_size = 10) +
  theme(
    panel.grid = element_blank(),
    axis.text.x = element_text(angle = 45, hjust = 1, color = "black"),
    axis.text.y = element_text(color = "black"),
    axis.title.x = element_text(color = "black"),
    axis.title.y = element_text(color = "black"),
    legend.position = "bottom",
    legend.direction = "horizontal",
    legend.box.just = "left",
    plot.title.position = "plot",
    legend.title = element_text(size = 8),
    legend.text = element_text(size = 7)
  )

# Heatmap standalone (diagnostic, not in paper - included in composite figure)
if (isFALSE(pnas)) {
  save_pnas_pdf(p_heatmap, here("output", "pnas", "figures", sprintf("fig_credit_heatmap%s.pdf", analysis_paths$suffix)), height = 8)
}

### General network----
# Prepare network data focusing on actor-to-actor credit relationships
network_data <- g %>%
  filter(gives_credit == 1) %>%
  select(actor, credit_biden, credit_ira, credit_senate, 
  credit_us_rep, credit_governor, credit_local) %>%
  # Standardize actor names
  mutate(
    from_actor = standardize_actor(actor),
    from_actor = case_when(
      from_actor == "U.S. Senator" ~ "Senator",
      from_actor == "U.S. Rep" ~ "U.S. Rep.",
      from_actor == "Biden Admin" ~ "Biden",
      TRUE ~ from_actor
    )
  ) %>%
  # Reshape to create edges
  pivot_longer(
    cols = c(credit_biden, credit_ira, credit_senate, credit_us_rep, credit_governor, credit_local),
    names_to = "to_credit", 
    values_to = "gives_credit_to"
  ) %>%
  filter(gives_credit_to == 1) %>%
  # Map credit variables to refined actor types
  mutate(
    to_actor = case_when(
      to_credit %in% c("credit_biden", "credit_ira") ~ "Biden",
      to_credit == "credit_senate" ~ "Senator", 
      to_credit == "credit_governor" ~ "Governor",
      to_credit == "credit_local" ~ "Local",
      to_credit == "credit_us_rep" ~ "U.S. Rep.",
      TRUE ~ to_credit
    )
  ) %>%
  # Remove self-credit within broad categories
  filter(!(from_actor == "Senator" & to_actor == "Senator")) %>%
  filter(!(from_actor == "U.S. Rep." & to_actor == "U.S. Rep.")) %>%
  filter(!(from_actor == "Governor" & to_actor == "Governor")) %>%
  filter(!(from_actor == "Biden" & to_actor == "Biden")) %>%
  # Count credit relationships
  group_by(from_actor, to_actor) %>%
  summarise(
    weight = n(),
    .groups = "drop"
  ) %>%
  # Filter to meaningful relationships (at least 2 instances)
  filter(weight >= 2) %>%
  # Keep original column names for network
  select(from = from_actor, to = to_actor, weight)

# Create nodes data frame with refined categories
nodes <- data.frame(
  name = unique(c(network_data$from, network_data$to)),
  stringsAsFactors = FALSE
) %>%
  mutate(
    node_type = case_when(
      name == "Company" ~ "Company",
      name == "Biden" ~ "Biden",
      name == "Senator" ~ "Senator",
      name == "U.S. Rep." ~ "U.S. Rep.", 
      name == "Governor" ~ "Governor",
      name == "Local" ~ "Local",
      TRUE ~ "Other"
    )
  )

# Create igraph object
credit_network <- graph_from_data_frame(
  d = network_data, 
  vertices = nodes, 
  directed = TRUE
)

# Create network plot using force-directed layout with manual seed
set.seed(42)
fr_layout <- layout_with_fr(credit_network, niter = 500)
manual_layout <- data.frame(
  name = V(credit_network)$name,
  x = fr_layout[,1],
  y = fr_layout[,2]
)

p_network <- ggraph(credit_network, layout = manual_layout) +
  geom_edge_fan(
    aes(width = weight, alpha = weight),
    arrow = arrow(type = "closed", length = unit(1.5, "mm")),
    start_cap = circle(8, "mm"), 
    end_cap = circle(8, "mm"),
    color = "#7f8c8d",
    strength = 2
  ) +
  geom_node_point(
    color = "black", 
    size = 15,
    alpha = 0.9
  ) +
  geom_node_text(
    aes(label = name), 
    size = 2.4, 
    fontface = "bold", 
    color = "white",
    family = "Helvetica"
  ) +
  scale_edge_width_continuous(
    range = c(0.25, 1.5), 
    name = "Credit Instances",
    guide = guide_legend(override.aes = list(alpha = 1))
  ) +
  scale_edge_alpha_continuous(
    range = c(1), 
    guide = "none"
  ) +
  theme_graph(base_family = "Arial", base_size = 10) +
  theme(
    legend.position = "bottom",
    legend.margin = margin(0, 0, 0, 0, "pt"),
    legend.box.margin = margin(0, 0, 0, 0, "pt"),
    plot.margin = margin(5, 5, 5, 5, "pt")
  ) +
  expand_limits(x = c(min(manual_layout$x) - 0.03, max(manual_layout$x) + 0.03),
                y = c(min(manual_layout$y) - 0.03, max(manual_layout$y) + 0.03))

# Diagnostic figure (not in paper)
if (isFALSE(pnas)) {
  save_pnas_pdf(p_network, here("output", "pnas", "figures", sprintf("fig_credit_network%s.pdf", analysis_paths$suffix)), height = 6)
}

# Print network summary statistics
message("Network Summary:")
message(paste("Nodes:", vcount(credit_network)))
message(paste("Edges:", ecount(credit_network)))
message(paste("Network density:", round(edge_density(credit_network), 3)))

# Print top credit relationships
message("Top credit relationships:")
top_relationships <- network_data %>%
  arrange(desc(weight)) %>%
  head(10) %>%
  mutate(relationship = paste(from, "→", to, "(", weight, "times)"))
for (rel in top_relationships$relationship) {
  message(paste(" ", rel))
}

### Partisan network----
# Prepare partisan network data with party information
partisan_network_data <- g %>%
  filter(gives_credit == 1) %>%
  filter(!is.na(party)) %>%  # Only include actors with party info (excludes companies)
  select(actor, party, credit_biden, credit_ira, credit_senate, credit_us_rep, credit_governor, credit_local) %>%
  # Standardize actor names and add party info
  mutate(
    from_actor = standardize_actor(actor),
    from_actor = case_when(
      from_actor == "U.S. Senator" ~ "Senator",
      from_actor == "U.S. Rep" ~ "U.S. Rep.",
      from_actor == "Biden Admin" ~ "Biden",
      TRUE ~ from_actor
    ),
    from_party = case_when(
      party == "D" ~ "Democratic",
      party == "R" ~ "Republican",
      TRUE ~ "Other"
    )
  ) %>%
  # Reshape to create edges
  pivot_longer(
    cols = c(credit_biden, credit_ira, credit_senate, credit_us_rep, credit_governor, credit_local),
    names_to = "to_credit", 
    values_to = "gives_credit_to"
  ) %>%
  filter(gives_credit_to == 1) %>%
  # Map credit variables to refined actor types
  mutate(
    to_actor = case_when(
      to_credit %in% c("credit_biden", "credit_ira") ~ "Biden",
      to_credit == "credit_senate" ~ "Senator", 
      to_credit == "credit_us_rep" ~ "U.S. Rep.",
      to_credit == "credit_governor" ~ "Governor",
      to_credit == "credit_local" ~ "Local",
      TRUE ~ to_credit
    )
  ) %>%
  # Remove self-credit within broad categories
  filter(!(from_actor == "Senator" & to_actor == "Senator")) %>%
  filter(!(from_actor == "U.S. Rep." & to_actor == "U.S. Rep.")) %>%
  filter(!(from_actor == "Governor" & to_actor == "Governor")) %>%
  # Count credit relationships by party
  group_by(from_actor, to_actor, from_party) %>%
  summarise(
    weight = n(),
    .groups = "drop"
  ) %>%
  # Filter to meaningful relationships (at least 2 instances)
  filter(weight >= 2) %>%
  # Keep original column names for network
  select(from = from_actor, to = to_actor, weight, from_party)

# Create partisan nodes data frame
partisan_nodes <- data.frame(
  name = unique(c(partisan_network_data$from, partisan_network_data$to)),
  stringsAsFactors = FALSE
) %>%
  mutate(
    node_type = case_when(
      name == "Biden" ~ "Biden",
      name == "Senator" ~ "Senator",
      name == "U.S. Rep." ~ "U.S. Rep.", 
      name == "Governor" ~ "Governor",
      name == "Local" ~ "Local",
      TRUE ~ "Other"
    )
  )

# Create partisan igraph object
partisan_credit_network <- graph_from_data_frame(
  d = partisan_network_data, 
  vertices = partisan_nodes, 
  directed = TRUE
)

# Create a layout matrix that matches the partisan network nodes with positions from the main network
partisan_node_names <- V(partisan_credit_network)$name
partisan_layout_df <- manual_layout %>%
  filter(name %in% partisan_node_names) %>%
  # Ensure we have all the nodes in the right order
  arrange(match(name, partisan_node_names))

# Convert to matrix format for ggraph
partisan_manual_layout <- as.matrix(partisan_layout_df[, c("x", "y")])

p_partisan_network <- ggraph(partisan_credit_network, layout = partisan_manual_layout) +
  geom_edge_fan(
    aes(width = weight, alpha = weight, color = from_party),
    arrow = arrow(type = "closed", length = unit(1.5, "mm")),
    start_cap = circle(8, "mm"), 
    end_cap = circle(8, "mm"),
    strength = 2
  ) +
  geom_node_point(
    color = "black", 
    size = 15,
    alpha = 0.9
  ) +
  geom_node_text(
    aes(label = name), 
    size = 2.4, 
    fontface = "bold", 
    color = "white",
    family = "Helvetica"
  ) +
  scale_edge_width_continuous(
    range = c(0.25, 1.5), 
    name = "Credit Instances",
    guide = guide_legend(override.aes = list(alpha = 1))
  ) +
  scale_edge_alpha_continuous(
    range = c(1), 
    guide = "none"
  ) +
  scale_edge_color_manual(
    values = c(
      "Democratic" = party_colors$Democrat,
      "Republican" = party_colors$Republican
    ),
    labels = c("Democratic" = "D", "Republican" = "R"),
    name = "Speaker Party"
  ) +
  theme_graph(base_family = "Arial", base_size = 10) +
  theme(
    legend.position = "bottom",
    legend.box = "vertical",
    legend.margin = margin(0, 0, 0, 0, "pt"),
    legend.box.margin = margin(0, 0, 0, 0, "pt"),
    plot.margin = margin(5, 5, 5, 5, "pt")
  ) +
  expand_limits(x = c(min(partisan_manual_layout[,1]) - 0.03, max(partisan_manual_layout[,1]) + 0.03),
                y = c(min(partisan_manual_layout[,2]) - 0.03, max(partisan_manual_layout[,2]) + 0.03))
if (isFALSE(pnas)) {
  save_pnas_pdf(p_partisan_network, here("output", "pnas", "figures", sprintf("fig_credit_network_partisan%s.pdf", analysis_paths$suffix)), height = 6)
}

# Print partisan network summary
message("\nPartisan Network Summary:")
message(paste("Nodes:", vcount(partisan_credit_network)))
message(paste("Edges:", ecount(partisan_credit_network)))

# Summary by party
party_summary <- partisan_network_data %>%
  group_by(from_party) %>%
  summarise(
    total_credits = sum(weight),
    unique_relationships = n(),
    .groups = "drop"
  ) %>%
  arrange(desc(total_credits))

message("Credit giving by party:")
for (i in 1:nrow(party_summary)) {
  message(paste(" ", party_summary$from_party[i], ":", party_summary$total_credits[i], "total credits,", 
                party_summary$unique_relationships[i], "unique relationships"))
}

### Correlates of credit-claiming----

# Calculate total statements by actor for adding to tables
total_statements_by_actor <- g %>%
  filter(gave_statement == 1) %>%
  mutate(actor_label = case_when(
    actor == "company_text" ~ "Company",
    actor == "governor_text" ~ "Governor",
    actor %in% c("senator_1_text", "senator_2_text") ~ "Senator",
    actor == "representative_text" ~ "Rep",
    actor == "biden_text" ~ "President",
    TRUE ~ actor
  )) %>%
  group_by(actor_label) %>%
  summarise(n = n(), .groups = "drop")

# Create add_rows dataframe for modelsummary (must match model column order)
total_statements_row <- data.frame(
  term = "Total statements",
  Company = total_statements_by_actor$n[total_statements_by_actor$actor_label == "Company"],
  Governor = total_statements_by_actor$n[total_statements_by_actor$actor_label == "Governor"],
  Senator = total_statements_by_actor$n[total_statements_by_actor$actor_label == "Senator"],
  Rep = total_statements_by_actor$n[total_statements_by_actor$actor_label == "Rep"],
  President = total_statements_by_actor$n[total_statements_by_actor$actor_label == "President"]
)

m.biden_ira <- list()
m.biden_ira$Company <- est_model(
  outcome = "credit_biden_ira",
  varname = NULL,
  covar.list.in = covar.list[!grepl("^party$", covar.list)],
  data.in = subset(g, actor == "company_text"),
  se = "state",
  fe = NULL
  )
m.biden_ira$Governor <- est_model(
  outcome = "credit_biden_ira",
  varname = NULL,
  covar.list.in = covar.list[!grepl("gov_party", covar.list)],
  data.in = subset(g, actor == "governor_text"),
  se = "state",
  fe = NULL
  )
m.biden_ira$Senator <- est_model(
  outcome = "credit_biden_ira",
  varname = NULL,
  covar.list.in = covar.list,
  data.in = subset(g, actor %in% c("senator_1_text", "senator_2_text")),
  se = "state",
  fe = NULL
  )
m.biden_ira$Rep <- est_model(
  outcome = "credit_biden_ira",
  varname = NULL,
  covar.list.in = covar.list[!grepl("rep_party", covar.list)],
  data.in = subset(g, actor == "representative_text"),
  se = "state",
  fe = NULL
  )
m.biden_ira$President <- est_model(
  outcome = "credit_biden_ira",
  varname = NULL,
  covar.list.in = covar.list[!grepl("^party$", covar.list)],
  data.in = subset(g, actor == "biden_text"),
  se = "state",
  fe = NULL
  )

# Create table
credit.notes <- paste0(
  "\\textit{Notes:} Each column reports a separate linear probability model for a speaker, ",
  "conditional on the speaker making a project-related statement. ",
  "The dependent variable equals 1 if the speaker credited the Biden Administration or IRA, 0 otherwise. ",
  "Unit of analysis is the project--actor pair. ",
  "$N$ reflects observations with non-missing covariates; Total statements shows all statements by actor. ",
  "Estimates are OLS with cluster-robust standard errors by state. ",
  "Continuous covariates are county-standardized. ",
  "Coefficients are percentage-point changes. ",
  "* $p<0.05$, ** $p<0.01$, *** $p<0.001$."
)

credit.caption <- "Linear probability models of Biden/IRA credit, by speaker"

# Skip robustness version of this table in PNAS mode
if (!skip_robustness_tables) {
  credit.biden.file <- here("output", "pnas", "tables", sprintf("tab_S32_credit_biden_model%s.tex", analysis_paths$suffix))

  modelsummary(
    m.biden_ira,
    gof_omit = "IC|RMSE|Std|FE|Within",
    gof_map = gm,
    output = "tinytable",
    escape = FALSE,
    fmt = fmt_significant(2),
    vcov = ~ state,
    stars = c("*" = 0.05, "**" = 0.01, "***" = 0.001),
    coef_map = coefmap,
    add_rows = total_statements_row,
    notes = credit.notes,
    title = credit.caption,
    label = "tab:credit_biden"
    ) %>%
    group_tt(
      j = list("Outcome: Credited Biden/IRA (=1)" = 2:6)
      ) %>%
    theme_latex(resize_width = .5, resize_direction = "both", placement = "H") %>%
    save_tt(output = credit.biden.file, overwrite = TRUE)
}

# Credit correlates coefficient plot (diagnostic, not in paper)
if (isFALSE(pnas)) {
  credit_model_df <- modelplot(m.biden_ira, draw = FALSE, vcov = ~ state)
  p.credit.correlates <- credit_model_df %>%
    filter(grepl("^partyr|year|dem|swing", term, ignore.case = TRUE)) %>%
    mutate(
      term = case_when(
        term == "factor(year)2023" ~ "2023 (vs. 2022)",
        term == "factor(year)2024" ~ "2024 (vs. 2022)",
        term == "partyR" ~ "Republican",
        term == "demshare_major_z" ~ "2020 Biden vote share",
        term == "swingstate" ~ "Swing state",
        TRUE ~ term
      )
    ) %>%
    ggplot(aes(y = term, x = estimate, xmin = conf.low, xmax = conf.high, 
    color = model, shape = model)) +
    geom_vline(xintercept = 0, color = "grey") +
    geom_pointrange(position = position_dodge(width = 0.5)) +
    theme_classic(base_size = 10) +
    labs(x = "Estimated effect on likelihood of crediting Biden/IRA\n(coefficients from linear regression, 95% CI)", y = NULL, 
    color = NULL, shape = NULL) +
    scale_colour_viridis_d() +
    theme(
      legend.position = "bottom"
    )
  save_pnas_pdf(p.credit.correlates, here("output", "pnas", "figures", sprintf("fig_credit_correlates%s.pdf", analysis_paths$suffix)), height = 6)
}

#### Create composite figure-----
p.credit.out <- p_heatmap + p_partisan_network + 
  plot_layout(ncol = 2) +
  plot_annotation(tag_levels = "A")

save_pnas_pdf(
  p.credit.out,
  here("output", "pnas", "figures", sprintf("fig_%s_credit_panel%s.pdf", analysis_paths$credit_panel_fig, analysis_paths$suffix)),
  width = "double",
  height = 9
)

## Model of credit giving----
m.gov <- list()
m.gov$Company <- est_model(
  outcome = "credit_governor",
  varname = NULL,
  covar.list.in = covar.list[!grepl("^party$", covar.list)],
  data.in = subset(g, actor == "company_text"),
  se = "state",
  fe = NULL
  )
m.gov$Governor <- est_model(
  outcome = "credit_governor",
  varname = NULL,
  covar.list.in = covar.list[!grepl("gov_party", covar.list)],
  data.in = subset(g, actor == "governor_text"),
  se = "state",
  fe = NULL
  )
m.gov$Senator <- est_model(
  outcome = "credit_governor",
  varname = NULL,
  covar.list.in = covar.list,
  data.in = subset(g, actor %in% c("senator_1_text", "senator_2_text")),
  se = "state",
  fe = NULL
  )
m.gov$Rep <- est_model(
  outcome = "credit_governor",
  varname = NULL,
  covar.list.in = covar.list[!grepl("rep_party", covar.list)],
  data.in = subset(g, actor == "representative_text"),
  se = "state",
  fe = NULL
  )
m.gov$President <- est_model(
  outcome = "credit_governor",
  varname = NULL,
  covar.list.in = covar.list[!grepl("^party$", covar.list)],
  data.in = subset(g, actor == "biden_text"),
  se = "state",
  fe = NULL
  )

credit.gov.notes <- paste0(
  "\\textit{Notes:} Each column reports a separate linear probability model for a speaker, ",
  "conditional on the speaker making a project-related statement. ",
  "The dependent variable equals 1 if the speaker credited the Governor, 0 otherwise. ",
  "Unit of analysis is the project--actor pair. ",
  "$N$ reflects observations with non-missing covariates; Total statements shows all statements by actor. ",
  "Estimates are OLS with cluster-robust standard errors by state. ",
  "Continuous covariates are county-standardized. ",
  "Coefficients are percentage-point changes. ",
  "* $p<0.05$, ** $p<0.01$, *** $p<0.001$."
)

credit.gov.caption <- "Linear probability models of Governor credit, by speaker"

# Skip robustness version of this table in PNAS mode
if (!skip_robustness_tables) {
  credit.gov.file <- here("output", "pnas", "tables", sprintf("tab_S33_credit_gov_model%s.tex", analysis_paths$suffix))

  modelsummary(
    m.gov,
    gof_omit = "IC|RMSE|Std|FE|Within",
    gof_map = gm,
    output = "tinytable",
    escape = FALSE,
    fmt = fmt_significant(2),
    vcov = ~ state,
    stars = c("*" = 0.05, "**" = 0.01, "***" = 0.001),
    coef_map = coefmap,
    add_rows = total_statements_row,
    notes = credit.gov.notes,
    title = credit.gov.caption,
    label = "tab:credit_gov"
    ) %>%
    group_tt(
      j = list("Outcome: Credited Governor (=1)" = 2:6)
      ) %>%
    theme_latex(resize_width = .53, resize_direction = "both", placement = "H") %>%
    save_tt(output = credit.gov.file, overwrite = TRUE)
}

# Senator
m.senate <- list()
m.senate$Company <- est_model(
  outcome = "credit_senate",
  varname = NULL,
  covar.list.in = covar.list[!grepl("^party$", covar.list)],
  data.in = subset(g, actor == "company_text"),
  se = "state",
  fe = NULL
  )
m.senate$Governor <- est_model(
  outcome = "credit_senate",
  varname = NULL,
  covar.list.in = covar.list[!grepl("gov_party", covar.list)],
  data.in = subset(g, actor == "governor_text"),
  se = "state",
  fe = NULL
  )
m.senate$Senator <- est_model(
  outcome = "credit_senate",
  varname = NULL,
  covar.list.in = covar.list,
  data.in = subset(g, actor %in% c("senator_1_text", "senator_2_text")),
  se = "state",
  fe = NULL
  )
m.senate$Rep <- est_model(
  outcome = "credit_senate",
  varname = NULL,
  covar.list.in = covar.list[!grepl("rep_party", covar.list)],
  data.in = subset(g, actor == "representative_text"),
  se = "state",
  fe = NULL
  )
m.senate$President <- est_model(
  outcome = "credit_senate",
  varname = NULL,
  covar.list.in = covar.list[!grepl("^party$", covar.list)],
  data.in = subset(g, actor == "biden_text"),
  se = "state",
  fe = NULL
  )

credit.senate.notes <- paste0(
  "\\textit{Notes:} Each column reports a separate linear probability model for a speaker, ",
  "conditional on the speaker making a project-related statement. ",
  "The dependent variable equals 1 if the speaker credited the U.S. Senator, 0 otherwise. ",
  "Unit of analysis is the project--actor pair. ",
  "$N$ reflects observations with non-missing covariates; Total statements shows all statements by actor. ",
  "Estimates are OLS with cluster-robust standard errors by state. ",
  "Continuous covariates are county-standardized. ",
  "Coefficients are percentage-point changes. ",
  "* $p<0.05$, ** $p<0.01$, *** $p<0.001$."
)

credit.senate.caption <- "Linear probability models of Senator credit, by speaker"

# Skip robustness version of this table in PNAS mode
if (!skip_robustness_tables) {
  credit.senate.file <- here("output", "pnas", "tables", sprintf("tab_S34_credit_senate_model%s.tex", analysis_paths$suffix))

  modelsummary(
    m.senate,
    gof_omit = "IC|RMSE|Std|FE|Within",
    gof_map = gm,
    output = "tinytable",
    escape = FALSE,
    fmt = fmt_significant(2),
    vcov = ~ state,
    stars = c("*" = 0.05, "**" = 0.01, "***" = 0.001),
    coef_map = coefmap,
    add_rows = total_statements_row,
    notes = credit.senate.notes,
    title = credit.senate.caption,
    label = "tab:credit_senate"
    ) %>%
    group_tt(
      j = list("Outcome: Credited Senator (=1)" = 2:6)
      ) %>%
    theme_latex(resize_width = .55, resize_direction = "both", placement = "H") %>%
    save_tt(output = credit.senate.file, overwrite = TRUE)
}

# Representative
m.rep <- list()
m.rep$Company <- est_model(
  outcome = "credit_us_rep",
  varname = NULL,
  covar.list.in = covar.list[!grepl("^party$", covar.list)],
  data.in = subset(g, actor == "company_text"),
  se = "state",
  fe = NULL
  )
m.rep$Governor <- est_model(
  outcome = "credit_us_rep",
  varname = NULL,
  covar.list.in = covar.list[!grepl("gov_party", covar.list)],
  data.in = subset(g, actor == "governor_text"),
  se = "state",
  fe = NULL
  )
m.rep$Senator <- est_model(
  outcome = "credit_us_rep",
  varname = NULL,
  covar.list.in = covar.list,
  data.in = subset(g, actor %in% c("senator_1_text", "senator_2_text")),
  se = "state",
  fe = NULL
  )
m.rep$Rep <- est_model(
  outcome = "credit_us_rep",
  varname = NULL,
  covar.list.in = covar.list[!grepl("rep_party", covar.list)],
  data.in = subset(g, actor == "representative_text"),
  se = "state",
  fe = NULL
  )
m.rep$President <- est_model(
  outcome = "credit_us_rep",
  varname = NULL, 
  covar.list.in = covar.list[!grepl("^party$", covar.list)],
  data.in = subset(g, actor == "biden_text"),
  se = "state",
  fe = NULL
  )

credit.rep.notes <- paste0(
  "\\textit{Notes:} Each column reports a separate linear probability model for a speaker, ",
  "conditional on the speaker making a project-related statement. ",
  "The dependent variable equals 1 if the speaker credited the U.S. Representative, 0 otherwise. ",
  "Unit of analysis is the project--actor pair. ",
  "$N$ reflects observations with non-missing covariates; Total statements shows all statements by actor. ",
  "Estimates are OLS with cluster-robust standard errors by state. ",
  "Continuous covariates are county-standardized. ",
  "Coefficients are percentage-point changes. ",
  "* $p<0.05$, ** $p<0.01$, *** $p<0.001$."
)

credit.rep.caption <- "Linear probability models of Representative credit, by speaker"

# Skip robustness version of this table in PNAS mode
if (!skip_robustness_tables) {
  credit.rep.file <- here("output", "pnas", "tables", sprintf("tab_S35_credit_rep_model%s.tex", analysis_paths$suffix))

  modelsummary(
    m.rep,
    gof_omit = "IC|RMSE|Std|FE|Within",
    gof_map = gm,
    output = "tinytable",
    escape = FALSE,
    fmt = fmt_significant(2),
    vcov = ~ state,
    stars = c("*" = 0.05, "**" = 0.01, "***" = 0.001),
    coef_map = coefmap,
    add_rows = total_statements_row,
    notes = credit.rep.notes,
    title = credit.rep.caption,
    label = "tab:credit_rep"
    ) %>%
    group_tt(
      j = list("Outcome: Credited Representative (=1)" = 2:6)
      ) %>%
    theme_latex(resize_width = .54, resize_direction = "both", placement = "H") %>%
    save_tt(output = credit.rep.file, overwrite = TRUE)
}

# Analysis complete
message(sprintf("\n=== %s STATEMENTS ANALYSIS COMPLETE ===", toupper(analysis_paths$description)))
message(sprintf("Processed %d observations from %d unique projects", nrow(g), n_distinct(g$id)))
if (!is.null(ROBUSTNESS_CHECK)) {
  message(sprintf("Robustness check outputs saved with suffix: %s", analysis_paths$suffix))
  message("Compare with primary analysis results to assess robustness")
}

# DSL Bias-Adjusted Regression Analysis ----
library(dsl)

# Load human annotations
h <- read_csv(here("data", "input", "credit", "annotation_qc", "aditya_qc.csv"),
              show_col_types = FALSE)

# Rename human-annotated columns with _h suffix
qc <- h %>%
  rename(
    gives_credit_h = gives_credit...14,
    credit_biden_h = credit_biden,
    credit_senate_h = credit_senate,
    credit_governor_h = credit_governor
  ) %>%
  select(id, statement, gives_credit_h, credit_biden_h, credit_senate_h, credit_governor_h) %>%
  mutate(id = as.character(id))

# Join statements and annotations
qc_dsl <- g %>%
  mutate(id = as.character(id)) %>%
  left_join(qc, by = c("id", "statement"))
qc_dsl <- subset(qc_dsl, gave_statement == 1 & gives_credit == 1)

# Diagnostic: check how many human annotations we have
message("\n=== DSL Data Diagnostics ===")
message(sprintf("Total observations with statements giving credit: %d", nrow(qc_dsl)))
message(sprintf("Observations with human credit_biden annotation: %d", sum(!is.na(qc_dsl$credit_biden_h))))
message(sprintf("Observations with human credit_senate annotation: %d", sum(!is.na(qc_dsl$credit_senate_h))))
message(sprintf("Observations with human credit_governor annotation: %d", sum(!is.na(qc_dsl$credit_governor_h))))

# Check by actor
message("\nObservations by actor:")
print(table(qc_dsl$actor))

message("\nHuman annotations by actor:")
qc_dsl %>%
  group_by(actor) %>%
  summarise(
    n = n(),
    n_biden_h = sum(!is.na(credit_biden_h)),
    n_senate_h = sum(!is.na(credit_senate_h)),
    n_governor_h = sum(!is.na(credit_governor_h)),
    .groups = "drop"
  ) %>%
  print()

# Define speakers and their covariate exclusions
speakers <- list(
  Company = list(actor = "company_text", exclude = "^party$"),
  Governor = list(actor = "governor_text", exclude = "gov_party"),
  Senator = list(actor = c("senator_1_text", "senator_2_text"), exclude = NULL),
  Rep = list(actor = "representative_text", exclude = "rep_party"),
  President = list(actor = "biden_text", exclude = "^party$")
)

# Define credit outcomes with human annotations available
credit_outcomes <- c("credit_biden", "credit_senate", "credit_governor")

# Function to run DSL model for a speaker-outcome pair
run_dsl_model <- function(speaker_name, speaker_info, outcome, data, covar.list) {
  # Filter data for this speaker
  speaker_data <- subset(data, actor %in% speaker_info$actor)
  
  # Skip if no observations
  if (nrow(speaker_data) == 0) {
    message(sprintf("  Skipping %s - %s: no observations", speaker_name, outcome))
    return(NULL)
  }
  
  # Create covariate list excluding speaker-specific variables
  if (!is.null(speaker_info$exclude)) {
    covars <- covar.list[!grepl(speaker_info$exclude, covar.list)]
  } else {
    covars <- covar.list
  }
  
  # Define human annotation variable
  human_var <- paste0(outcome, "_h")
  
  # Count human annotations for this subset
  n_human <- sum(!is.na(speaker_data[[human_var]]))
  message(sprintf("  %s - %s: %d obs, %d human annotations", speaker_name, outcome, nrow(speaker_data), n_human))
  
  # Check if we have human annotations for this subset
  if (n_human < 5) {
    message(sprintf("    -> Skipping: insufficient human annotations (need >= 5)"))
    return(NULL)
  }
  
  # Build formula - use simpler model if sample is small
  if (nrow(speaker_data) < 50) {
    # Use intercept-only model for small samples
    formula_str <- paste0(outcome, " ~ 1")
    message(sprintf("    -> Using intercept-only model (small sample)"))
  } else {
    formula_str <- paste0(outcome, " ~ ", paste(covars, collapse = " + "))
  }
  
  tryCatch({
out <- dsl(
  model = "lm",
      formula = as.formula(formula_str),
      predicted_var = human_var,
      prediction = outcome,
      data = speaker_data
    )
    message(sprintf("    -> Success"))
    
    return(out)
  }, error = function(e) {
    message(sprintf("    -> Error: %s", e$message))
    return(NULL)
  })
}

# Run all DSL models
message("\n=== Running DSL Bias-Adjusted Regressions ===")
dsl_results <- list()

for (outcome in credit_outcomes) {
  message(sprintf("\nOutcome: %s", outcome))
  dsl_results[[outcome]] <- list()
  
  for (speaker_name in names(speakers)) {
    message(sprintf("  Speaker: %s", speaker_name))
    result <- run_dsl_model(
      speaker_name = speaker_name,
      speaker_info = speakers[[speaker_name]],
      outcome = outcome,
      data = qc_dsl,
      covar.list = covar.list
    )
    if (!is.null(result)) {
      dsl_results[[outcome]][[speaker_name]] <- result
    }
  }
}

# Create LaTeX tables for DSL results
message("\n=== Creating LaTeX Tables for DSL Results ===")

# Function to create a properly formatted regression table for one outcome
create_dsl_latex_table <- function(outcome, results_list, output_file, coefmap) {
  if (length(results_list) == 0) {
    message(sprintf("  No results for %s", outcome))
    return(NULL)
  }
  
  # Extract all coefficients from each speaker into a list of data frames
  coef_list <- list()
  n_list <- list()
  
  for (speaker_name in names(results_list)) {
    dsl_obj <- results_list[[speaker_name]]
    if (is.null(dsl_obj)) next
    
    tryCatch({
      # DSL object has: coefficients (vector), standard_errors (vector)
      coefs <- dsl_obj$coefficients
      ses <- dsl_obj$standard_errors
      
      # Get p-values from summary
      s <- summary(dsl_obj)
      # The summary prints a table with columns: Estimate, Std. Error, CI Lower, CI Upper, p value
      # Try to extract p-values from the summary coefficients matrix
      pvals <- if(!is.null(s) && is.matrix(s) && ncol(s) >= 5) {
        s[, 5]  # p value is 5th column
      } else if(!is.null(dsl_obj$internal$p_value)) {
        dsl_obj$internal$p_value
      } else {
        # Calculate p-values from z-scores (estimate / se)
        z <- coefs / ses
        2 * pnorm(-abs(z))
      }
      
      # Get sample size
      n_obs <- if(!is.null(dsl_obj$internal$data)) nrow(dsl_obj$internal$data) else length(coefs)
      n_list[[speaker_name]] <- n_obs
      
      # Create data frame with coefficient, SE, and p-value for each term
      coef_df <- tibble(
        term = names(coefs),
        estimate = as.numeric(coefs),
        std.error = as.numeric(ses),
        p.value = as.numeric(pvals)
      )
      
      coef_list[[speaker_name]] <- coef_df
      message(sprintf("  Extracted %d coefficients for %s", nrow(coef_df), speaker_name))
    }, error = function(e) {
      message(sprintf("  Error extracting for %s: %s", speaker_name, e$message))
    })
  }
  
  if (length(coef_list) == 0) {
    message(sprintf("  No coefficients extracted for %s - skipping", outcome))
    return(NULL)
  }
  
  # Get all unique terms across all models
  all_terms <- unique(unlist(lapply(coef_list, function(x) x$term)))
  
  # Build the table: each row is a term, alternating estimate/SE rows
  # Each column is a speaker
  table_data <- list()
  
  # Helper function to add significance stars

  add_stars <- function(est, pval) {
    if (is.na(pval)) return(sprintf("%.3f", est))
    stars <- if (pval < 0.001) "***" else if (pval < 0.01) "**" else if (pval < 0.05) "*" else ""
    sprintf("%.3f%s", est, stars)
  }
  
  for (term in all_terms) {
    # Row for estimate
    est_row <- list(term = term)
    # Row for SE
    se_row <- list(term = "")
    
    for (speaker_name in names(coef_list)) {
      coef_df <- coef_list[[speaker_name]]
      term_data <- coef_df[coef_df$term == term, ]
      
      if (nrow(term_data) > 0) {
        est_row[[speaker_name]] <- add_stars(term_data$estimate[1], term_data$p.value[1])
        se_row[[speaker_name]] <- sprintf("(%.3f)", term_data$std.error[1])
      } else {
        est_row[[speaker_name]] <- ""
        se_row[[speaker_name]] <- ""
      }
    }
    
    table_data[[paste0(term, "_est")]] <- as.data.frame(est_row, stringsAsFactors = FALSE)
    table_data[[paste0(term, "_se")]] <- as.data.frame(se_row, stringsAsFactors = FALSE)
  }
  
  # Combine into single data frame
  table_df <- bind_rows(table_data)
  
  # Add N row
  n_row <- list(term = "N")
  for (speaker_name in names(n_list)) {
    n_row[[speaker_name]] <- as.character(n_list[[speaker_name]])
  }
  table_df <- bind_rows(table_df, as.data.frame(n_row, stringsAsFactors = FALSE))
  
  # Rename term column and apply coefmap if available
  if (!is.null(coefmap)) {
    table_df$term <- sapply(table_df$term, function(t) {
      if (t %in% names(coefmap)) coefmap[[t]] else t
    })
  }
  
  # Rename first column
  names(table_df)[1] <- " "
  
  # Format outcome name for display
  outcome_label <- case_when(
    outcome == "credit_biden" ~ "Biden",
    outcome == "credit_senate" ~ "Senator",
    outcome == "credit_governor" ~ "Governor",
    TRUE ~ outcome
  )
  
  # Create table notes
  table_notes <- paste0(
    "\\textit{Notes:} Bias-adjusted regression estimates using DSL \\citep{egami2024}. ",
    "Unit of analysis is the project--actor pair. ",
    "Standard errors in parentheses. ",
    "* $p<0.05$, ** $p<0.01$, *** $p<0.001$."
  )
  
  table_caption <- sprintf("Bias-adjusted linear probability models of %s credit, by speaker (DSL)", outcome_label)
  
  # Create tinytable with notes
  group_header <- setNames(
    list(2:ncol(table_df)),
    paste0("Outcome: Credited ", outcome_label, " (=1)")
  )
  
  # Row index for N (last row) - add hline before it
  n_row_idx <- nrow(table_df)
  
  tt <- tinytable::tt(table_df, notes = table_notes, caption = table_caption) %>%
    tinytable::group_tt(j = group_header) %>%
    tinytable::style_tt(i = n_row_idx, line = "t", line_width = 0.05) %>%
    tinytable::theme_latex(resize_width = 0.55, resize_direction = "both", placement = "H")
  
  # Save to file
  tinytable::save_tt(tt, output = output_file, overwrite = TRUE)
  
  message(sprintf("  Saved: %s", output_file))
  
  return(tt)
}

# Generate tables for each outcome with table number prefixes
# Skip robustness versions in PNAS mode
if (!skip_robustness_tables) {
  dsl_table_numbers <- c(
    "credit_biden" = "S36",
    "credit_senate" = "S37",
    "credit_governor" = "S38"
  )

  for (outcome in credit_outcomes) {
    outcome_suffix <- gsub("credit_", "", outcome)
    table_num <- dsl_table_numbers[[outcome]]
    output_file <- here(
      "output", "pnas", "tables",
      sprintf("tab_%s_dsl_%s%s.tex", table_num, outcome_suffix, analysis_paths$suffix)
    )
    
    create_dsl_latex_table(
      outcome = outcome,
      results_list = dsl_results[[outcome]],
      output_file = output_file,
      coefmap = coefmap
    )
  }
}

message("\n=== DSL Bias-Adjusted Analysis Complete ===")